Excess mortality: 1851-2020 monthly Swedish data & regression approach
Data
Monthly number of deaths, 1851-2020.
Prepared in 02.Rmd.
sweden_deaths_monthly <- read_rds("data/Sweden/sweden_deaths_monthly.Rds") %>%
rename(deaths = deaths_month,
pop = population)Years
year_from <- min(sweden_deaths_monthly$Year)
year_reg <- year_from + 10
year_max <- 2020Pandemics
flu_years_hc <- c(1890, 1918, 2020)
flu_years_hc_affected <- c(seq(1890 + 1, 1890 + 10),
seq(1918 + 1, 1918 + 10))
flu_years <- c(1890, 1918, 1957, 1968, 1977, 2009, 2020)
flu_years_affected <- c(flu_years_hc_affected,
seq(1957 + 1, 1957 + 10),
seq(1968 + 1, 1968 + 10),
seq(1977 + 1, 1977 + 10),
seq(2009 + 1, 2009 + 10))Other vars
sweden_deaths_monthly %<>%
mutate(flu_year_hc = ifelse(Year %in% flu_years_hc, 1, 0),
flu_year_hc_affected = ifelse(Year %in% flu_years_hc_affected, 1, 0),
deaths_flu_year_hc = ifelse(Year %in% flu_years_hc, NA, deaths),
flu_year = ifelse(Year %in% flu_years, 1, 0),
flu_year_affected = ifelse(Year %in% flu_years_affected, 1, 0),
deaths_flu_year = ifelse(Year %in% flu_years, NA, deaths),
si_one = sin(2*pi*Month/12),
si_two = sin(4*pi*Month/12),
co_one = cos(2*pi*Month/12),
co_two = cos(4*pi*Month/12)) Three pandemics
With denominator
5- & 10-year medians
sweden_deaths_monthly %<>%
arrange(Month, Year) %>%
mutate(median = slide_dbl(deaths,
stats::median, na.rm = TRUE,
.before = 10, .after = -1, .complete = TRUE),
median_flu_year_hc = slide_dbl(deaths_flu_year_hc,
stats::median, na.rm = TRUE,
.before = 10, .after = -1, .complete = TRUE),
median_flu_year = slide_dbl(deaths_flu_year,
stats::median, na.rm = TRUE,
.before = 10, .after = -1, .complete = TRUE),
) %>%
arrange(Year, Month)Impact of 1918
Impact of 1957
Seasonality
dcmp <- sweden_deaths_monthly %>%
mutate(Time = row_number()) %>%
tsibble::as_tsibble(index = Time) %>%
fabletools::model(feasts::STL((deaths/pop)*100000 ~ season(window = Inf)))
# fabletools::components(dcmp)
fabletools::components(dcmp) %>%
autoplot()Outcome
Marcel’s regression approach
CI via bootstrap
Method from https://statcompute.wordpress.com/2015/12/20/prediction-intervals-for-poisson-regression/
p_load(doParallel, foreach)
registerDoParallel(cores = 4)
boot_pi <- function(model, pdata, n, p) {
odata <- model$data
lp <- (1 - p) / 2
up <- 1 - lp
seeds <- round(runif(n, 1, 1000), 0)
boot_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
set.seed(seeds[i])
bdata <- odata[sample(seq(nrow(odata)), size = nrow(odata), replace = TRUE), ]
bpred <- predict(update(model, data = bdata), type = "response", newdata = pdata)
rpois(length(bpred), lambda = bpred)
}
boot_ci <- t(apply(boot_y, 2, quantile, c(lp, up)))
return(data.frame(pred = predict(model, newdata = pdata, type = "response"),
lower = boot_ci[, 1], upper = boot_ci[, 2]))
}Looping over years
Analyses using 10 previous years.
With three different inclusions depending on pandemic / flu years
for (YEAR in year_reg:year_max){
print(paste("Analysing year", YEAR))
reg_data <- sweden_deaths_monthly %>%
filter(Year >= YEAR - 10 & Year < YEAR)
pred_data <- sweden_deaths_monthly %>%
filter(Year == YEAR) %>%
mutate(
fit = NA_real_,
lpi = NA_real_,
upi = NA_real_,
fit_flu_hc = NA_real_,
lpi_flu_hc = NA_real_,
upi_flu_hc = NA_real_,
fit_flu = NA_real_,
lpi_flu = NA_real_,
upi_flu = NA_real_)
# summary(reg_data$Year)
# summary(pred_data$Year)
# all data
summary(monthly <- glm(deaths ~ Year +
si_one + si_two + co_one + co_two,
offset = log(pop),
data = reg_data,
family = "poisson"))
predict <- boot_pi(monthly, pred_data, 1000, 0.99)
pred_data %<>%
mutate(
fit = predict$pred,
lpi = predict$lower,
upi = predict$upper,
fit_flu_hc = predict$pred,
lpi_flu_hc = predict$lower,
upi_flu_hc = predict$upper,
fit_flu = predict$pred,
lpi_flu = predict$lower,
upi_flu = predict$upper
)
# flu hc excluded
if (YEAR %in% flu_years_hc_affected) {
print(paste(" >> Extra analyses for hc flu year exclusions"))
summary(monthly_flu_hc <- glm(deaths_flu_year_hc ~ Year +
si_one + si_two + co_one + co_two,
offset = log(pop),
data = reg_data,
family = "poisson"))
predict_flu_hc <- boot_pi(monthly_flu_hc, pred_data, 1000, 0.99)
pred_data %<>%
mutate(
fit_flu_hc = predict_flu_hc$pred,
lpi_flu_hc = predict_flu_hc$lower,
upi_flu_hc = predict_flu_hc$upper
)
}
# all flu excluded
if (YEAR %in% flu_years_affected) {
print(paste(" >> Extra analyses for flu year exclusions"))
summary(monthly_flu <- glm(deaths_flu_year ~ Year +
si_one + si_two + co_one + co_two,
offset = log(pop),
data = reg_data,
family = "poisson"))
predict_flu <- boot_pi(monthly_flu, pred_data, 1000, 0.99)
pred_data %<>%
mutate(
fit_flu = predict_flu$pred,
lpi_flu = predict_flu$lower,
upi_flu = predict_flu$upper
)
}
if (YEAR == year_reg) {
se_expected_deaths_monthly <- pred_data
} else {
se_expected_deaths_monthly <- bind_rows(se_expected_deaths_monthly, pred_data)
}
}
rm(predict, predict_flu_hc, predict_flu, pred_data, reg_data, YEAR)
write_rds(se_expected_deaths_monthly, "data/se_expected_deaths_monthly.Rds")
haven::write_dta(se_expected_deaths_monthly, "data/se_expected_deaths_monthly.dta")Two predictions
1890
With extremes
Without extremes
1918
With extremes
Without extremes
1957
With extremes
Without extremes
2020
With extremes
Without extremes
Alt Viz
Animate
Stacked
Diffs 1
Diffs 2
Computing Environment
R version 4.0.4 (2021-02-15) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] parallel stats graphics grDevices utils datasets methods [8] base other attached packages: [1] iterators_1.0.13 slider_0.2.1 magrittr_2.0.1 scales_1.1.1 [5] lubridate_1.7.10 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 [9] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.1 [13] ggplot2_3.3.3 tidyverse_1.3.1 pacman_0.5.1To cite R in publication use:
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.